Replicating pipeline and results described in Galbraith et al. (2016). PNAS. https://www.pnas.org/content/113/4/1020
if(suppressWarnings(require("purrr"))==F){
install.packages("purrr")
library(purrr)
}
if(suppressWarnings(require("tidyverse"))==F){
install.packages("tidyverse")
library(tidyverse)
}
if(suppressWarnings(require("plyr"))==F){
install.packages("plyr")
library(plyr)
}
if(suppressWarnings(require("ggplot2"))==F){
install.packages("ggplot2")
library(ggplot2)
}
if(suppressWarnings(require("Rfast"))==F){
install.packages("Rfast")
library(Rfast)
}
if(suppressWarnings(require("tryCatchLog"))==F){
install.packages("tryCatchLog")
library(tryCatchLog)
}
if(suppressWarnings(require("lme4"))==F){
install.packages("lme4")
library(lme4)
}
if(suppressWarnings(require("car"))==F){
install.packages("car")
library(car)
}
if(suppressWarnings(require("viridis"))==F){
install.packages("viridis")
library(viridis)
}
if(suppressWarnings(require("gridExtra"))==F){
install.packages("gridExtra")
library(gridExtra)
}
if(suppressWarnings(require("kableExtra"))==F){
install.packages("kableExtra")
library(kableExtra)
}
metadata <- read.csv("metadata_OD.csv")
kbl(metadata) %>% kable_styling()
| sample.id | parent | cross | phenotype | lineage |
|---|---|---|---|---|
| X8751ID | 875D | A | sterile | EHB |
| X8756ID | 875D | A | sterile | EHB |
| X875aID | 875D | A | sterile | EHB |
| X8827ID | 882D | B | sterile | EHB |
| X882gID | 882D | B | sterile | EHB |
| X882kID | 882D | B | sterile | EHB |
| X882mID | 882D | B | sterile | EHB |
| X888eID | 888D | A | sterile | AHB |
| X888qID | 888D | A | sterile | AHB |
| X888uID | 888D | A | sterile | AHB |
| X894cID | 894D | B | sterile | AHB |
| X894fID | 894D | B | sterile | AHB |
| X894iID | 894D | B | sterile | AHB |
| X8751IQ | 875Q | A | sterile | EHB |
| X8756IQ | 875Q | A | sterile | EHB |
| X875aIQ | 875Q | A | sterile | EHB |
| X8827IQ | 882Q | B | sterile | EHB |
| X882giQ | 882Q | B | sterile | EHB |
| X882kIQ | 882Q | B | sterile | EHB |
| X882mIQ | 882Q | B | sterile | EHB |
| X888eIQ | 888Q | A | sterile | AHB |
| X888qIQ | 888Q | A | sterile | AHB |
| X888uIQ | 888Q | A | sterile | AHB |
| X894cIQ | 894Q | B | sterile | AHB |
| X894fIQ | 894Q | B | sterile | AHB |
| X894iIQ | 894Q | B | sterile | AHB |
| X8754AD | 875D | A | reproductive | EHB |
| X875cAD | 875D | A | reproductive | EHB |
| X875gAD | 875D | A | reproductive | EHB |
| X8820AD | 882D | B | reproductive | EHB |
| X882cAD | 882D | B | reproductive | EHB |
| X882pAD | 882D | B | reproductive | EHB |
| X888aAD | 888D | A | reproductive | AHB |
| X888hAD | 888D | A | reproductive | AHB |
| X888jAD | 888D | A | reproductive | AHB |
| X894bAD | 894D | B | reproductive | AHB |
| X894qAD | 894D | B | reproductive | AHB |
| X894rAD | 894D | B | reproductive | AHB |
| X8754AQ | 875Q | A | reproductive | EHB |
| X875cAQ | 875Q | A | reproductive | EHB |
| X875gAQ | 875Q | A | reproductive | EHB |
| X8820AQ | 882Q | B | reproductive | EHB |
| X882caQ | 882Q | B | reproductive | EHB |
| X882pAQ | 882Q | B | reproductive | EHB |
| X888aAQ | 888Q | A | reproductive | AHB |
| X888hAQ | 888Q | A | reproductive | AHB |
| X888jAQ | 888Q | A | reproductive | AHB |
| X894bAQ | 894Q | B | reproductive | AHB |
| X894qAQ | 894Q | B | reproductive | AHB |
| X894rAQ | 894Q | B | reproductive | AHB |
SNPs_for_analysis.bed, keep columns 1 (chromosome), 2 (position), and 4 (SNP:gene).SNPs <- read.table("sNPs_for_analysis_sorted.bed",sep="\t",header=F)[,c(1,2,4)]
names(SNPs) <- c("chr","pos","SNP_gene")
SNPs$SNP <- as.character(map(strsplit(SNPs$SNP_gene, split = ":"), 1))
SNPs$geneID <- as.character(map(strsplit(SNPs$SNP_gene, split = ":"), 2))
write.table(SNPs,"SNP_gene_overlaps.txt",sep="\t",quote=F,row.names=F)
kbl(head(SNPs)) %>% kable_styling()
SNPs <- read.table("SNP_gene_overlaps.txt",sep="\t",header=1)
SNP_counts with as many rows as SNP:genes.*.txt files in the counts_SNPs directory.SNP_counts <- data.frame(matrix(ncol=0,nrow=length(SNPs$SNP_gene)))
SNP_counts$SNP_gene <- SNPs$SNP_gene
files.counts <- list.files(path="counts_tophat2",
pattern="*.txt",
full.names=TRUE,
recursive=FALSE)
head(files.counts)
files.counts:tmp, keep columns 4 (SNP:gene) and 7 (counts).tmp.name.tmp to column 1 (SNP:gene) and column 2 (tmp.name).tmp to SNP_counts.SNP_counts to the SNP:gene IDs stored in column 1 (SNP:gene).for(i in 1:length(files.counts)){
tmp <- read.table(files.counts[[i]],header=F)[,c(4,7)]
tmp.name <- strsplit(strsplit(files.counts[[i]],
split = "/")[[1]][2],
split = "[.]")[[1]][1]
names(tmp) <- c("SNP_gene",tmp.name)
SNP_counts <- SNP_counts %>%
left_join(tmp, by = c('SNP_gene' = 'SNP_gene'))
}
row.names(SNP_counts) <- SNP_counts$SNP_gene
SNP_counts$SNP_gene <- NULL
SNP_counts[is.na(SNP_counts)] <- 0
write.csv(SNP_counts,"SNP_gene_counts.csv")
kbl(head(SNP_counts[,c(1:5)])) %>% kable_styling()
sterile_counts <- read.csv("GSE76164_PSGE_sterile.csv")
sterile_counts <- sterile_counts[!sterile_counts$ID==".",]
row.names(sterile_counts) <- paste(sterile_counts$ID,sterile_counts$SNP.pos,sep=":")
reproductive_counts <- read.csv("GSE76164_PSGE_reproductive.csv")
reproductive_counts <- reproductive_counts[!reproductive_counts$ID==".",]
row.names(reproductive_counts) <- paste(reproductive_counts$ID,reproductive_counts$SNP.pos,sep=":")
Remove rows where any column has <=lcf reads by cross.
lcf <- 5
sterile_counts <- sterile_counts[rowSums(sterile_counts[,4:29])>lcf*(length(names(sterile_counts[,4:29]))/2),]
reproductive_counts <- reproductive_counts[rowSums(reproductive_counts[,4:27])>lcf*(length(names(reproductive_counts[,4:27]))/2),]
l875.s <- metadata[metadata$parent%in%c("875D","875Q")&metadata$phenotype=="sterile","sample.id"]
l888.s <- metadata[metadata$parent%in%c("888D","888Q")&metadata$phenotype=="sterile","sample.id"]
l882.s <- metadata[metadata$parent%in%c("882D","882Q")&metadata$phenotype=="sterile","sample.id"]
l894.s <- metadata[metadata$parent%in%c("894D","894Q")&metadata$phenotype=="sterile","sample.id"]
sterile_counts <- sterile_counts[rowSums(sterile_counts[,l875.s])>lcf,]
sterile_counts <- sterile_counts[rowSums(sterile_counts[,l888.s])>lcf,]
sterile_counts <- sterile_counts[rowSums(sterile_counts[,l882.s])>lcf,]
sterile_counts <- sterile_counts[rowSums(sterile_counts[,l894.s])>lcf,]
l875.r <- metadata[metadata$parent%in%c("875D","875Q")&metadata$phenotype=="reproductive","sample.id"]
l888.r <- metadata[metadata$parent%in%c("888D","888Q")&metadata$phenotype=="reproductive","sample.id"]
l882.r <- metadata[metadata$parent%in%c("882D","882Q")&metadata$phenotype=="reproductive","sample.id"]
l894.r <- metadata[metadata$parent%in%c("894D","894Q")&metadata$phenotype=="reproductive","sample.id"]
reproductive_counts <- reproductive_counts[rowSums(reproductive_counts[,l875.r])>lcf,]
reproductive_counts <- reproductive_counts[rowSums(reproductive_counts[,l888.r])>lcf,]
reproductive_counts <- reproductive_counts[rowSums(reproductive_counts[,l882.r])>lcf,]
reproductive_counts <- reproductive_counts[rowSums(reproductive_counts[,l894.r])>lcf,]
### Remove rows with greater than 10000 counts (cannot run SK test on these)
reproductive_counts$SUM <- rowSums(reproductive_counts[,4:27])
reproductive_counts <- reproductive_counts[reproductive_counts$SUM<10000,]
reproductive_counts$SUM <- NULL
sterile_counts$SUM <- rowSums(sterile_counts[,4:29])
sterile_counts <- sterile_counts[sterile_counts$SUM<10000,]
sterile_counts$SUM <- NULL
### Remove rows with greater than 10000 counts (cannot run SK test on these)
length(row.names(reproductive_counts))
## [1] 36216
length(row.names(sterile_counts))
## [1] 36683
| p | pat/mat | lineage |
|---|---|---|
| p1 | pat | AHB |
| p1 | mat | EHB |
| p2 | pat | EHB |
| p2 | mat | AHB |
# Sterile
## p1
p1.sterile.pat <- sterile_counts[,metadata[metadata$parent%in%c("875D","882D")&
metadata$phenotype=="sterile","sample.id"]]
p1.sterile.mat <- sterile_counts[,metadata[metadata$parent%in%c("875Q","882Q")&
metadata$phenotype=="sterile","sample.id"]]
names(p1.sterile.pat) <- names(p1.sterile.mat)
## p2
p2.sterile.pat <- sterile_counts[,metadata[metadata$parent%in%c("888D","894D")&
metadata$phenotype=="sterile","sample.id"]]
p2.sterile.mat <- sterile_counts[,metadata[metadata$parent%in%c("888Q","894Q")&
metadata$phenotype=="sterile","sample.id"]]
names(p2.sterile.pat) <- names(p2.sterile.mat)
# Reproductive
## p1
p1.reproductive.pat <- reproductive_counts[,metadata[metadata$parent%in%c("875D","882D")&
metadata$phenotype=="reproductive","sample.id"]]
p1.reproductive.mat <- reproductive_counts[,metadata[metadata$parent%in%c("875Q","882Q")&
metadata$phenotype=="reproductive","sample.id"]]
names(p1.reproductive.pat) <- names(p1.reproductive.mat)
## p2
p2.reproductive.pat <- reproductive_counts[,metadata[metadata$parent%in%c("888D","894D")&
metadata$phenotype=="reproductive","sample.id"]]
p2.reproductive.mat <- reproductive_counts[,metadata[metadata$parent%in%c("888Q","894Q")&
metadata$phenotype=="reproductive","sample.id"]]
names(p2.reproductive.pat) <- names(p2.reproductive.mat)
twobinom](Function from the WRS2 package). Test the hypothesis that two independent binomials have equal probability of success using the Storer–Kim method.
twobinom<-function(r1=sum(elimna(x)),n1=length(x),
r2=sum(elimna(y)),n2=length(y),
x=NA,y=NA,alpha=.05){
# Modified for efficiency: changed outer() command to Rfast::Outer()
# Requires Rfast & tryCatchLog packages!
# r1=number of successes in group 1
# r2=number of successes in group 2
# n1=number of observations in group 1
# n2=number of observations in group 2
n1p<-n1+1
n2p<-n2+1
n1m<-n1-1
n2m<-n2-1
chk<-abs(r1/n1-r2/n2)
x<-c(0:n1)/n1
y<-c(0:n2)/n2
phat<-(r1+r2)/(n1+n2)
m1<-t(Outer(x,y,"-"))
m2<-matrix(1,n1p,n2p)
flag<-(abs(m1)>=chk)
m3<-m2*flag
rm(m1,m2,flag)
b1<-1
b2<-1
xv<-c(1:n1)
yv<-c(1:n2)
xv1<-n1-xv+1
yv1<-n2-yv+1
dis1<-c(1,pbeta(phat,xv,xv1))
dis2<-c(1,pbeta(phat,yv,yv1))
pd1<-NA
pd2<-NA
for(i in 1:n1){pd1[i]<-dis1[i]-dis1[i+1]}
for(i in 1:n2){pd2[i]<-dis2[i]-dis2[i+1]}
pd1[n1p]<-phat^n1
pd2[n2p]<-phat^n2
m4<-t(Outer(pd1,pd2,"*"))
test<-sum(m3*m4)
rm(m3,m4)
list(p.value=test,p1=r1/n1,p2=r2/n2,est.dif=r1/n1-r2/n2)
}
test.df]high and low], set the associated test p-value to the Storer-Kim test p-value; otherwise, set to 1.test.df <- function(p1.pat.df,p1.mat.df,
p2.pat.df,p2.mat.df,high,low,
stest=c("CS","SK","thresholds"),
startrow=1,endrow="n"){
# Requires Rfast & tryCatchLog packages!
if(endrow=="n"){endrow <- length(row.names(p1.pat.df))}
return.list <- list()
p1.pat.df <- p1.pat.df[startrow:endrow,]
p1.mat.df <- p1.mat.df[startrow:endrow,]
p2.pat.df <- p2.pat.df[startrow:endrow,]
p2.mat.df <- p2.mat.df[startrow:endrow,]
i.len <- length(row.names(p1.pat.df))
for(i in 1:i.len){
print(i)
p1.pat <- p1.pat.df[i,]
p1.mat <- p1.mat.df[i,]
p2.pat <- p2.pat.df[i,]
p2.mat <- p2.mat.df[i,]
p1.s <- sum(p1.pat)
p1.o <- sum(p1.pat,p1.mat)
p1 <- p1.s/p1.o
if(is.na(p1)){p1 <- 0}
p2.s <- sum(p2.mat)
p2.o <- sum(p2.pat,p2.mat)
p2 <- p2.s/p2.o
if(is.na(p2)){p2 <- 0}
if(stest=="SK"){
tryCatchLog::tryCatchLog(test <- twobinom(r1=p1.s,n1=p1.o,r2=p2.s,n2=p2.o)$p.value,
error = function(e) {test <- "MEM.EXHAUST"})
}
if(stest=="CS"){
test <- chisq.test(data.frame(X=c(p1.s,p2.s),Y=c(p1.o,p2.o)))$p.value
}
if(stest=="thresholds"){
test <- 0
}
if(is.nan(test)){test <- 1}
if(p1>high&p2<low){pat.test <- test}else{pat.test <- 1}
if(p1<low&p2>high){mat.test <- test}else{mat.test <- 1}
if(p1<low&p2<low){EHB.test <- test}else{EHB.test <- 1}
if(p1>high&p2>high){AHB.test <- test}else{AHB.test <- 1}
return.df <- data.frame(SNP_gene=row.names(p1.pat.df[i,]),
pat.p=pat.test,mat.p=mat.test,
AHB.p=AHB.test,EHB.p=EHB.test)
return.list[[i]] <- return.df
}
return(bind_rows(return.list))
}
GLIMMIX]GLIMMIX <- function(counts,metadata){
# Requires tryCatchLog package!
counts$SNP_gene <- row.names(counts)
parent.list <- list()
parent.p.list <- list()
lineage.list <- list()
lineage.p.list <- list()
counts$geneID <- as.character(unlist(map(strsplit(counts$SNP_gene, split = ":"), 1)))
genelist <- unique(counts$geneID)
counts <- counts[,-c(1:3)]
for(i in 1:length(genelist)){
print(paste("Row ",i," of ",length(genelist),sep=""))
counts.sub <- counts[counts$geneID==genelist[i],]
counts.sub$geneID <- NULL
counts.sub <- gather(counts.sub, sample.id, count,
names(counts.sub), -SNP_gene, factor_key=TRUE)
counts.sub <- join(counts.sub, metadata, by = "sample.id")[,-c(5:6)]
counts.sub$parent <- substr(counts.sub$parent,4,4)
counts.sub$parent <- as.factor(counts.sub$parent)
counts.sub$SNP_gene <- as.factor(counts.sub$SNP_gene)
counts.sub$lineage <- as.factor(counts.sub$lineage)
counts.sub$sample.id <- as.factor(counts.sub$sample.id)
testfail <- F
test <- "null"
if(length(levels(counts.sub$SNP_gene))>1){
tryCatchLog(test <- lmer(count~parent+lineage+parent*lineage+(1|SNP_gene)+(1|sample.id),
data=counts.sub),
error = function(e) {testfail <- T})
}else{
tryCatchLog(test <- lmer(count~parent+lineage+parent*lineage+(1|sample.id),
data=counts.sub),
error = function(e) {testfail <- T})
}
if(class(test)=="character"){testfail <- T}
if(testfail==F){
test.p <- Anova(test)
test <- summary(test)
parent.list[i] <- test[["coefficients"]][2,1]
parent.p.list[i] <- test.p[1,3]
lineage.list[i] <- test[["coefficients"]][3,1]
lineage.p.list[i] <- test.p[2,3]
}else{
parent.list[i] <- 0
parent.p.list[i] <- 1
lineage.list[i] <- 0
lineage.p.list[i] <- 1
}
cat("\014")
}
return(data.frame(ID=genelist,
parent.dirpat.est=unlist(parent.list),
parent.dirpat.p=unlist(parent.p.list),
lineageEHB.est=unlist(lineage.list),
lineageEHB.p=unlist(lineage.p.list)))
}
sterile.SK <- test.df(p1.sterile.pat,p1.sterile.mat,
p2.sterile.pat,p2.sterile.mat,
low=0.4,high=0.6,"SK")
write.csv(sterile.SK,"sterile_SK.csv")
sterile.GLIMMIX <- GLIMMIX(sterile_counts,metadata)
write.csv(sterile.GLIMMIX,"sterile_GLIMMIX.csv")
reproductive.SK <- test.df(p1.reproductive.pat,p1.reproductive.mat,
p2.reproductive.pat,p2.reproductive.mat,
low=0.4,high=0.6,"SK")
write.csv(reproductive.SK,"reproductive_SK.csv")
reproductive.GLIMMIX <- GLIMMIX(reproductive_counts,metadata)
write.csv(reproductive.GLIMMIX,"reproductive_GLIMMIX.csv")
Description under construction.
## p1 = sum(A in ExA)/(A in ExA + E in ExA)
p1.sterile.plot <- data.frame(rowSums(p1.sterile.pat)/(rowSums(p1.sterile.mat)+
rowSums(p1.sterile.pat)))
names(p1.sterile.plot) <- c("p1")
## p2 = sum(A in AxE)/(A in AxE + E in AxE)
p2.sterile.plot <- data.frame(rowSums(p2.sterile.mat)/(rowSums(p2.sterile.mat)+
rowSums(p2.sterile.pat)))
names(p2.sterile.plot) <- c("p2")
sterile.plot <- cbind(p1.sterile.plot,p2.sterile.plot)
sterile.plot <- sterile.plot[row.names(sterile.plot)%in%sterile.SK$SNP_gene,]
sterile.plot <- cbind(sterile.plot,sterile.SK[,c(2:5)])
sterile.plot$SNP_gene <- row.names(sterile.plot)
sterile.plot$gene <- as.character(map(strsplit(sterile.plot$SNP_gene, split = ":"), 1))
sterile.plot$bias <- "NA"
sterile.plot[sterile.plot$pat.p<0.05,"bias"] <- "pat"
sterile.plot[sterile.plot$mat.p<0.05,"bias"] <- "mat"
sterile.plot[sterile.plot$EHB.p<0.05,"bias"] <- "EHB"
sterile.plot[sterile.plot$AHB.p<0.05,"bias"] <- "AHB"
biaslist <- data.frame(matrix(ncol=2,nrow=0))
names(biaslist) <- c("gene","bias")
genelist <- unique(sterile.plot$gene)
for(i in 1:length(genelist)){
tmp <- unique(sterile.plot[sterile.plot$gene==genelist[i],"bias"])
if(length(tmp)>1){
if(length(tmp)==2){
if(any(tmp%in%"NA")){
bias <- tmp[!tmp%in%"NA"]
}
}else{
bias <- "NA"
}
}else{bias <- tmp}
biaslist <- rbind(biaslist,data.frame(gene=genelist[[i]], bias=bias))
}
sterile.plot <- sterile.plot %>%
left_join(biaslist, by = c('gene' = 'gene'))
names(sterile.plot) <- c("p1","p2","pat.p","mat.p",
"AHB.p","EHB.p","SNP_gene","gene","xbias","bias")
bias.plot <- list()
for(i in 1:length(row.names(sterile.plot))){
if(!sterile.plot[i,"bias"]=="NA"){
bias.plot[i] <- "NA"
if(sterile.plot[i,"bias"]=="EHB"&sterile.plot[i,"p1"]<0.4&sterile.plot[i,"p2"]<0.4){
bias.plot[i] <- "EHB"
}
if(sterile.plot[i,"bias"]=="mat"&sterile.plot[i,"p1"]<0.4&sterile.plot[i,"p2"]>0.6){
bias.plot[i] <- "mat"
}
if(sterile.plot[i,"bias"]=="AHB"&sterile.plot[i,"p1"]>0.6&sterile.plot[i,"p2"]>0.6){
bias.plot[i] <- "AHB"
}
if(sterile.plot[i,"bias"]=="pat"&sterile.plot[i,"p1"]>0.6&sterile.plot[i,"p2"]<0.4){
bias.plot[i] <- "pat"
}
}else{
bias.plot[i] <- "NA"
}
}
sterile.plot$bias.plot <- unlist(bias.plot)
sterile.plot <- rbind(sterile.plot[sterile.plot$bias.plot%in%c("NA"),],
sterile.plot[sterile.plot$bias.plot%in%c("mat", "AHB", "EHB", "pat"),])
sterile.plot$bias.plot <- factor(sterile.plot$bias.plot,
levels = c("NA","mat", "AHB", "EHB", "pat"))
sterile.GLIMMIX.biased <- sterile.GLIMMIX[sterile.GLIMMIX$parent.dirpat.p<0.05|sterile.GLIMMIX$lineageEHB.p<0.05,1]
sterile.plot[!sterile.plot$gene%in%sterile.GLIMMIX.biased,"bias.plot"] <- "NA"
g1 <- ggplot(sterile.plot, aes(x=p1, y=p2,
color=bias.plot,alpha=0.8)) +
geom_point() + theme_classic() +
xlab(expression(paste("% A allele in ",E[mother],
" x ",A[father],sep=""))) +
ylab(expression(paste("% A allele in ",A[mother],
" x ",E[father],sep=""))) +
ggtitle("sterile workers") +
theme(text = element_text(size=20),
plot.title = element_text(hjust = 0.5)) +
scale_color_manual(breaks = levels(sterile.plot$bias)[-c(1)],
values=c("grey", magma(20)[c(2)],
magma(20)[c(8)], magma(20)[c(12)],
magma(20)[c(16)]))+
guides(alpha=F, color=F)
g1
## p1 = sum(A in ExA)/(A in ExA + E in ExA)
p1.reproductive.plot <- data.frame(rowSums(p1.reproductive.pat)/(rowSums(p1.reproductive.mat)+
rowSums(p1.reproductive.pat)))
names(p1.reproductive.plot) <- c("p1")
## p2 = sum(A in AxE)/(A in AxE + E in AxE)
p2.reproductive.plot <- data.frame(rowSums(p2.reproductive.mat)/(rowSums(p2.reproductive.mat)+
rowSums(p2.reproductive.pat)))
names(p2.reproductive.plot) <- c("p2")
reproductive.plot <- cbind(p1.reproductive.plot,p2.reproductive.plot)
reproductive.plot <- reproductive.plot[row.names(reproductive.plot)%in%reproductive.SK$SNP_gene,]
reproductive.plot <- cbind(reproductive.plot,reproductive.SK[,c(2:5)])
reproductive.plot$SNP_gene <- row.names(reproductive.plot)
reproductive.plot$gene <- as.character(map(strsplit(reproductive.plot$SNP_gene, split = ":"), 1))
reproductive.plot$bias <- "NA"
reproductive.plot[reproductive.plot$pat.p<0.05,"bias"] <- "pat"
reproductive.plot[reproductive.plot$mat.p<0.05,"bias"] <- "mat"
reproductive.plot[reproductive.plot$EHB.p<0.05,"bias"] <- "EHB"
reproductive.plot[reproductive.plot$AHB.p<0.05,"bias"] <- "AHB"
biaslist <- data.frame(matrix(ncol=2,nrow=0))
names(biaslist) <- c("gene","bias")
genelist <- unique(reproductive.plot$gene)
for(i in 1:length(genelist)){
tmp <- unique(reproductive.plot[reproductive.plot$gene==genelist[i],"bias"])
if(length(tmp)>1){
if(length(tmp)==2){
if(any(tmp%in%"NA")){
bias <- tmp[!tmp%in%"NA"]
}
}else{
bias <- "NA"
}
}else{bias <- tmp}
biaslist <- rbind(biaslist,data.frame(gene=genelist[[i]], bias=bias))
}
reproductive.plot <- reproductive.plot %>%
left_join(biaslist, by = c('gene' = 'gene'))
names(reproductive.plot) <- c("p1","p2","pat.p","mat.p",
"AHB.p","EHB.p","SNP_gene","gene","xbias","bias")
bias.plot <- list()
for(i in 1:length(row.names(reproductive.plot))){
if(!reproductive.plot[i,"bias"]=="NA"){
bias.plot[i] <- "NA"
if(reproductive.plot[i,"bias"]=="EHB"&reproductive.plot[i,"p1"]<0.4&reproductive.plot[i,"p2"]<0.4){
bias.plot[i] <- "EHB"
}
if(reproductive.plot[i,"bias"]=="mat"&reproductive.plot[i,"p1"]<0.4&reproductive.plot[i,"p2"]>0.6){
bias.plot[i] <- "mat"
}
if(reproductive.plot[i,"bias"]=="AHB"&reproductive.plot[i,"p1"]>0.6&reproductive.plot[i,"p2"]>0.6){
bias.plot[i] <- "AHB"
}
if(reproductive.plot[i,"bias"]=="pat"&reproductive.plot[i,"p1"]>0.6&reproductive.plot[i,"p2"]<0.4){
bias.plot[i] <- "pat"
}
}else{
bias.plot[i] <- "NA"
}
}
reproductive.plot$bias.plot <- unlist(bias.plot)
reproductive.plot <- rbind(reproductive.plot[reproductive.plot$bias.plot%in%c("NA"),],
reproductive.plot[reproductive.plot$bias.plot%in%c("mat", "AHB", "EHB", "pat"),])
reproductive.plot$bias.plot <- factor(reproductive.plot$bias.plot,
levels = c("NA","mat", "AHB", "EHB", "pat"))
reproductive.GLIMMIX.biased <- reproductive.GLIMMIX[reproductive.GLIMMIX$parent.dirpat.p<0.05|reproductive.GLIMMIX$lineageEHB.p<0.05,1]
reproductive.plot[!reproductive.plot$gene%in%reproductive.GLIMMIX.biased,"bias.plot"] <- "NA"
g2 <- ggplot(reproductive.plot, aes(x=p1, y=p2,
color=bias.plot,alpha=0.8)) +
geom_point() + theme_classic() +
xlab(expression(paste("% A allele in ",E[mother],
" x ",A[father],sep=""))) +
ylab(expression(paste("% A allele in ",A[mother],
" x ",E[father],sep=""))) +
ggtitle("reproductive workers") +
theme(text = element_text(size=20),
plot.title = element_text(hjust = 0.5)) +
scale_color_manual(breaks = levels(reproductive.plot$bias)[-c(1)],
values=c("grey", magma(20)[c(2)],
magma(20)[c(8)], magma(20)[c(12)],
magma(20)[c(16)]))+
guides(alpha=F, color=F)
g2
gmid.df <- data.frame(Sterile=c(length(unique(sterile.plot[sterile.plot$bias.plot=="mat","gene"])),
length(unique(sterile.plot[sterile.plot$bias.plot=="AHB","gene"])),
length(unique(sterile.plot[sterile.plot$bias.plot=="EHB","gene"])),
length(unique(sterile.plot[sterile.plot$bias.plot=="pat","gene"]))),
Bias=c("mat","AHB","EHB","pat"),
Reproductive=c(length(unique(reproductive.plot[reproductive.plot$bias.plot=="mat","gene"])),
length(unique(reproductive.plot[reproductive.plot$bias.plot=="AHB","gene"])),
length(unique(reproductive.plot[reproductive.plot$bias.plot=="EHB","gene"])),
length(unique(reproductive.plot[reproductive.plot$bias.plot=="pat","gene"]))))
## Test if # of sterile biased genes is different from reproductive biased genes
mat.test <- chisq.test(data.frame(Success=c(gmid.df[1,1],gmid.df[1,3]),
Failure=c(length(unique(sterile_counts$ID))-gmid.df[1,1],
length(unique(sterile_counts$ID))-gmid.df[1,3]),
row.names=c("Sterile","Reproductive")))$p.value
AHB.test <- chisq.test(data.frame(Success=c(gmid.df[2,1],gmid.df[2,3]),
Failure=c(length(unique(sterile_counts$ID))-gmid.df[2,1],
length(unique(sterile_counts$ID))-gmid.df[2,3]),
row.names=c("Sterile","Reproductive")))$p.value
## Warning in chisq.test(data.frame(Success = c(gmid.df[2, 1], gmid.df[2, 3]), :
## Chi-squared approximation may be incorrect
EHB.test <- chisq.test(data.frame(Success=c(gmid.df[3,1],gmid.df[3,3]),
Failure=c(length(unique(sterile_counts$ID))-gmid.df[3,1],
length(unique(sterile_counts$ID))-gmid.df[3,3]),
row.names=c("Sterile","Reproductive")))$p.value
pat.test <- chisq.test(data.frame(Success=c(gmid.df[4,1],gmid.df[4,3]),
Failure=c(length(unique(sterile_counts$ID))-gmid.df[4,1],
length(unique(sterile_counts$ID))-gmid.df[4,3]),
row.names=c("Sterile","Reproductive")))$p.value
## Build table
gmid.df$`.` <- c(mat.test,AHB.test,EHB.test,pat.test)
gmid.df <- gmid.df[,c(4,1,2,3)]
nsrows <- row.names(gmid.df[gmid.df$`.`>0.05,])
gmid.df$`.` <- formatC(gmid.df$`.`, format = "e", digits = 2)
gmid.df[nsrows,"."] <- "(ns)"
cols <- matrix("black", nrow(gmid.df), ncol(gmid.df))
cols[1,3] <- magma(20)[c(2)]
cols[2,3] <- magma(20)[c(8)]
cols[3,3] <- magma(20)[c(12)]
cols[4,3] <- magma(20)[c(16)]
ccols <- matrix("white", nrow(gmid.df), ncol(gmid.df))
ccols[1,3] <- "#e4d8d1"
ccols[2,3] <- "#e4d8d1"
ccols[3,3] <- "#e4d8d1"
ccols[4,3] <- "#e4d8d1"
ccols[1,2] <- "#f4efea"
ccols[2,2] <- "#f4efea"
ccols[3,2] <- "#f4efea"
ccols[4,2] <- "#f4efea"
ccols[1,4] <- "#f4efea"
ccols[2,4] <- "#f4efea"
ccols[3,4] <- "#f4efea"
ccols[4,4] <- "#f4efea"
cfonts <- matrix("plain", nrow(gmid.df), ncol(gmid.df))
cfonts[1,3] <- "bold"
cfonts[2,3] <- "bold"
cfonts[3,3] <- "bold"
cfonts[4,3] <- "bold"
tt <- ttheme_default(core=list(fg_params = list(col = cols,
cex = 1,
fontface = cfonts),
bg_params = list(col=NA,
fill = ccols),
padding.h=unit(2, "mm")),
rowhead=list(bg_params = list(col=NA)),
colhead=list(bg_params = list(fill = c("white",
"#f4efea",
"#e4d8d1",
"#f4efea")),
fg_params = list(rot=90,
cex = 1,
col = c("white",
"black",
"black",
"black"))))
gmid <- tableGrob(gmid.df, rows = NULL, theme=tt)
plot(gmid)
fig1 <- arrangeGrob(g1, gmid, g2, widths=c(5,2.5,5))
ggsave(file="fig1.png", plot=fig1, width=18, height=6)